import  time,pymysql
from selenium import webdriver
from selenium.webdriver import ChromeOptions
from datetime import datetime
from crawlab import save_item

conn = pymysql.connect(
    host="172.16.10.201",
    user="zy001",
    port=3306,
    password="zy@123",
    database="crawl",
    charset = 'utf8'
)
cursor = conn.cursor()
sql = "insert into beike_community(community,url_list,url,builder_year,area,create_time) values(%s,%s,%s,%s,%s,%s)"

option = ChromeOptions()
prefs = {
        'profile.default_content_setting_values': {
            'images': 2
        }
    }
option.headless = True
option.add_experimental_option('prefs', prefs)
option.add_experimental_option('excludeSwitches', ['enable-automation'])
option.add_argument('--ignore-certificate-errors')
driver = webdriver.Chrome(executable_path='chromedriver', options=option)
print('浏览器加载成功')
area_url = {
    '五华':'https://km.ke.com/xiaoqu/wuhua/',
    '盘龙': 'https://km.ke.com/xiaoqu/panlong/',
    '官渡':'https://km.ke.com/xiaoqu/guandu/',
    '西山':'https://km.ke.com/xiaoqu/xishan23/',
    '呈贡':'https://km.ke.com/xiaoqu/chenggong/',
    '晋宁':'https://km.ke.com/xiaoqu/jinning/',
    '嵩明':'https://km.ke.com/xiaoqu/songming/',
    '东川':'https://km.ke.com/xiaoqu/dongchuan/',
    '富民':'https://km.ke.com/xiaoqu/fumin/',
    '宜良':'https://km.ke.com/xiaoqu/yiliang/',
    '石林':'https://km.ke.com/xiaoqu/shilin/',
    '寻甸':'https://km.ke.com/xiaoqu/xundian/',
    '禄劝':'https://km.ke.com/xiaoqu/luquan1/',
    '安宁':'https://km.ke.com/xiaoqu/anning/'
}

if __name__ == '__main__':
    # 实现路由拼接
    for k,v in area_url.items():
        # 分别选择房屋型号（一室二室）
        print('选择房屋')
        try:
            driver.get(v)
            time.sleep(3)
            # 获取页数
            page = driver.find_element_by_xpath('//*[@id="beike"]/div[1]/div[4]/div[1]/div[4]/div[2]/div/a[last()]').text
            if '下一页' in page:
                page = driver.find_element_by_xpath('//*[@id="beike"]/div[1]/div[4]/div[1]/div[4]/div[2]/div/a[last()-1]').text
            page = int(page)
            # 遍历页数
            for i in range(1,page+1):
                url = v+'pg'+str(i)+'/'
                print(url)
                driver.get(url)
                data = driver.find_elements_by_xpath('//div[@class="info"]')
                ls = []
                for i in data:
                    community = i.find_element_by_xpath('./div[@class="title"]/a').text
                    url = i.find_element_by_xpath('./div[@class="title"]/a').get_attribute('href')
                    builder_year = i.find_element_by_xpath('./div[@class="positionInfo"]').text
                    if builder_year:
                        builder_year = builder_year.split('/')[-1].strip()
                    ls.append((community,v,url,builder_year,k,datetime.now()))
                    # print((community,v,url,builder_year,k,datetime.now()))
                    # print(ls)
                    result = {'url':url,'url_list':v,'community': community,'builder_year':builder_year,'area':k,'create_time':datetime.now()}
                    save_item(result)
                cursor.executemany(sql,ls)
                conn.commit()
                print('数据存储成功')
                # print(result)
        except Exception as a:
            print(a)
            time.sleep(300)